package diona.emotions

import io.Source
import diona.nlp.PorterStemmer

/**
 * User: anton
 * Date: 21.06.11 22:27
 */
object RussianStopwordsFilter {
  lazy val stopWords : Set[String] = getStopWords

  private def getStopWords: Set[String] = {
    Source.fromInputStream(this.getClass.getResourceAsStream("/emotions/general/stopwords.txt"), "utf-8")
      .getLines().flatMap(s => Iterator(s, PorterStemmer.stem(s))).toSet
  }

  def filterAndStem(text: String) : String = {
    text.replaceAll("\\p{Punct}", " ").toLowerCase
      .split(' ').view
      .map(PorterStemmer.stem)
      .filterNot(s => s.trim().length() == 0 || stopWords.contains(s))
      .mkString(" ")
  }
}